{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python3",
   "display_name": "DataAnalysis"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tools import *\n",
    "%matplotlib inline"
   ]
  },
  {
   "source": [
    "# 2.5 WordNet\n",
    "面向语义的英语词典，结构更加丰富，可以寻找同义词"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "### 2.5.1 单词的意义 与 同义词"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "=============== >synsets() 查找同义词集的集合< ===============\n",
      "--------------- >motocar 没有同义词集的集合< ---------------\n",
      "wn.synsets('motocar')=  []\n"
     ]
    }
   ],
   "source": [
    "from nltk.corpus import wordnet as wn\n",
    "\n",
    "show_title(\"synsets() 查找同义词集的集合\")\n",
    "show_subtitle(\"motocar 没有同义词集的集合\")\n",
    "print(\"wn.synsets('motocar')= \", wn.synsets('motocar'))  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "wn.synsets('motorcar')=  [Synset('car.n.01')]\nwn.synsets('car')=  [Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]\nwn.synsets('auto')=  [Synset('car.n.01')]\nwn.synsets('automobile')=  [Synset('car.n.01'), Synset('automobile.v.01')]\nwn.synsets('machine')=  [Synset('machine.n.01'), Synset('machine.n.02'), Synset('machine.n.03'), Synset('machine.n.04'), Synset('machine.n.05'), Synset('car.n.01'), Synset('machine.v.01'), Synset('machine.v.02')]\n"
     ]
    }
   ],
   "source": [
    "# 以下单词的定义中都含有'car.n.01'\n",
    "print(\"wn.synsets('motorcar')= \", wn.synsets('motorcar'))\n",
    "print(\"wn.synsets('car')= \", wn.synsets('car'))\n",
    "print(\"wn.synsets('auto')= \", wn.synsets('auto'))\n",
    "print(\"wn.synsets('automobile')= \", wn.synsets('automobile'))\n",
    "print(\"wn.synsets('machine')= \", wn.synsets('machine'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "=============== >synset() 查找同义词集；synsets() 查找同义词集的集合< ===============\n--------------- >对单词查找同义词集，返回 ValueError< ---------------\n--------------- >对单词查找同义词集的集合< ---------------\nwn.synsets('car')=  [Synset('car.n.01'), Synset('car.n.02'), Synset('car.n.03'), Synset('car.n.04'), Synset('cable_car.n.01')]\n--------------- >对同义词定义查找同义词集< ---------------\nwn.synset('car.n.01')=  Synset('car.n.01')\n--------------- >对同义词定义查找同义词集的集合< ---------------\nwn.synsets('car.n.01')=  []\n"
     ]
    }
   ],
   "source": [
    "show_title(\"synset() 查找同义词集；synsets() 查找同义词集的集合\")\n",
    "show_subtitle(\"对单词查找同义词集，返回 ValueError\")\n",
    "# print(\"wn.synset('car')= \", wn.synset('car'))\n",
    "show_subtitle(\"对单词查找同义词集的集合\")\n",
    "print(\"wn.synsets('car')= \", wn.synsets('car'))\n",
    "show_subtitle(\"对同义词定义查找同义词集\")\n",
    "print(\"wn.synset('car.n.01')= \", wn.synset('car.n.01'))\n",
    "show_subtitle(\"对同义词定义查找同义词集的集合\")\n",
    "print(\"wn.synsets('car.n.01')= \", wn.synsets('car.n.01'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "=============== >查找词元集合< ===============\n--------------- >对单词查找词元集合< ---------------\nwn.lemmas('car')=  [Lemma('car.n.01.car'), Lemma('car.n.02.car'), Lemma('car.n.03.car'), Lemma('car.n.04.car'), Lemma('cable_car.n.01.car')]\n--------------- >对同义词定义查找词元集合< ---------------\nwn.lemmas('car.n.01')=  []\n--------------- >同义词集的词元集合< ---------------\nwn.synset('car.n.01').lemmas()=  [Lemma('car.n.01.car'), Lemma('car.n.01.auto'), Lemma('car.n.01.automobile'), Lemma('car.n.01.machine'), Lemma('car.n.01.motorcar')]\n--------------- >同义词集的词元集合中对应的单词集合< ---------------\nwn.synset('car.n.01').lemma_names()=  ['car', 'auto', 'automobile', 'machine', 'motorcar']\n"
     ]
    }
   ],
   "source": [
    "show_title(\"查找词元集合\")\n",
    "show_subtitle(\"对单词查找词元集合\")\n",
    "print(\"wn.lemmas('car')= \", wn.lemmas('car'))\n",
    "show_subtitle(\"对同义词定义查找词元集合\")\n",
    "print(\"wn.lemmas('car.n.01')= \", wn.lemmas('car.n.01'))\n",
    "show_subtitle(\"同义词集的词元集合\")\n",
    "print(\"wn.synset('car.n.01').lemmas()= \", wn.synset('car.n.01').lemmas()) \n",
    "show_subtitle(\"同义词集的词元集合中对应的单词集合\")\n",
    "print(\"wn.synset('car.n.01').lemma_names()= \", wn.synset('car.n.01').lemma_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Synset('car.n.01') = ['car', 'auto', 'automobile', 'machine', 'motorcar']\nSynset('car.n.02') = ['car', 'railcar', 'railway_car', 'railroad_car']\nSynset('car.n.03') = ['car', 'gondola']\nSynset('car.n.04') = ['car', 'elevator_car']\nSynset('cable_car.n.01') = ['cable_car', 'car']\n"
     ]
    }
   ],
   "source": [
    "# 先查找单词的同义词定义集合，再对同义词定义查找相应的词元集合对应的单词集合\n",
    "for synset in wn.synsets('car'):\n",
    "    print(synset,'=', synset.lemma_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >单词的含义< ---------------\nwn.synset('car.n.01').definition()=  a motor vehicle with four wheels; usually propelled by an internal combustion engine\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"单词的含义\")\n",
    "print(\"wn.synset('car.n.01').definition()= \", wn.synset('car.n.01').definition())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >单词的句子样例< ---------------\nwn.synset('car.n.01').examples()=  ['he needs a car to get to work']\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"单词的句子样例\")\n",
    "print(\"wn.synset('car.n.01').examples()= \", wn.synset('car.n.01').examples())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >对同义词定义查找词元集合< ---------------\nwn.lemma('car.n.01.automobile')=  Lemma('car.n.01.automobile')\n--------------- >对同义词定义查找同义词集< ---------------\nwn.lemma('car.n.01.automobile').synset=  Synset('car.n.01')\n--------------- >对同义词定义查找对应的单词< ---------------\nwn.lemma('car.n.01.automobile').name=  automobile\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"对同义词定义查找词元集合\")\n",
    "print(\"wn.lemma('car.n.01.automobile')= \", wn.lemma('car.n.01.automobile')) \n",
    "show_subtitle(\"对同义词定义查找同义词集\")\n",
    "print(\"wn.lemma('car.n.01.automobile').synset= \", wn.lemma('car.n.01.automobile').synset())  \n",
    "show_subtitle(\"对同义词定义查找对应的单词\")\n",
    "print(\"wn.lemma('car.n.01.automobile').name= \", wn.lemma('car.n.01.automobile').name())  "
   ]
  },
  {
   "source": [
    "### 2.5.2 WordNet 的层次结构\n",
    "图2-8：WordNet 概念的层次\n",
    "-   每个节点对应一个同义词集\n",
    "-   边表示上位词/下位词关系，即上级概念 与 下级概念 的关系"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "motorcar 上位词=  [Synset('motor_vehicle.n.01')]\n",
      "len(motorcar 下位词)=  31\n",
      "--------------- >motorcar 下位词列表< ---------------\n",
      "Synset('ambulance.n.01')\n",
      "Synset('beach_wagon.n.01')\n",
      "Synset('bus.n.04')\n",
      "Synset('cab.n.03')\n",
      "Synset('compact.n.03')\n",
      "Synset('convertible.n.01')\n",
      "Synset('coupe.n.01')\n",
      "Synset('cruiser.n.01')\n",
      "Synset('electric.n.01')\n",
      "Synset('gas_guzzler.n.01')\n",
      "Synset('hardtop.n.01')\n",
      "Synset('hatchback.n.01')\n",
      "Synset('horseless_carriage.n.01')\n",
      "Synset('hot_rod.n.01')\n",
      "Synset('jeep.n.01')\n",
      "Synset('limousine.n.01')\n",
      "Synset('loaner.n.02')\n",
      "Synset('minicar.n.01')\n",
      "Synset('minivan.n.01')\n",
      "Synset('model_t.n.01')\n",
      "Synset('pace_car.n.01')\n",
      "Synset('racer.n.02')\n",
      "Synset('roadster.n.01')\n",
      "Synset('sedan.n.01')\n",
      "Synset('sport_utility.n.01')\n",
      "Synset('sports_car.n.01')\n",
      "Synset('stanley_steamer.n.01')\n",
      "Synset('stock_car.n.01')\n",
      "Synset('subcompact.n.01')\n",
      "Synset('touring_car.n.01')\n",
      "Synset('used-car.n.01')\n"
     ]
    }
   ],
   "source": [
    "motorcar = wn.synset('car.n.01')\n",
    "print(\"motorcar 上位词= \", motorcar.hypernyms())\n",
    "types_of_motorcar= motorcar.hyponyms()\n",
    "print(\"len(motorcar 下位词)= \", len(types_of_motorcar))\n",
    "show_subtitle(\"motorcar 下位词列表\")\n",
    "for motorcar_hyponyms in types_of_motorcar:\n",
    "    print(motorcar_hyponyms)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "len(lemma_of_motorcar)=  66\n--------------- >lemma_of_motorcar< ---------------\nModel_T\nS.U.V.\nSUV\nStanley_Steamer\nambulance\nbeach_waggon\nbeach_wagon\nbus\ncab\ncompact\ncompact_car\nconvertible\ncoupe\ncruiser\nelectric\nelectric_automobile\nelectric_car\nestate_car\ngas_guzzler\nhack\nhardtop\nhatchback\nheap\nhorseless_carriage\nhot-rod\nhot_rod\njalopy\njeep\nlandrover\nlimo\nlimousine\nloaner\nminicar\nminivan\npace_car\npatrol_car\nphaeton\npolice_car\npolice_cruiser\nprowl_car\nrace_car\nracer\nracing_car\nroadster\nrunabout\nsaloon\nsecondhand_car\nsedan\nsport_car\nsport_utility\nsport_utility_vehicle\nsports_car\nsquad_car\nstation_waggon\nstation_wagon\nstock_car\nsubcompact\nsubcompact_car\ntaxi\ntaxicab\ntourer\ntouring_car\ntwo-seater\nused-car\nwaggon\nwagon\n"
     ]
    }
   ],
   "source": [
    "# lemma()：词元集合\n",
    "lemma_of_motorcar = sorted([\n",
    "        lemma.name()\n",
    "        for synset in types_of_motorcar\n",
    "        for lemma in synset.lemmas()\n",
    "])\n",
    "print(\"len(lemma_of_motorcar)= \",len(lemma_of_motorcar))\n",
    "show_subtitle(\"lemma_of_motorcar\")\n",
    "for lemma in lemma_of_motorcar:\n",
    "    print(lemma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "len(paths)=  2\n",
      "--------------- >通过hypernym_paths抵达motorcar的路径< ---------------\n",
      "[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('artifact.n.01'), Synset('instrumentality.n.03'), Synset('container.n.01'), Synset('wheeled_vehicle.n.01'), Synset('self-propelled_vehicle.n.01'), Synset('motor_vehicle.n.01'), Synset('car.n.01')]\n",
      "[Synset('entity.n.01'), Synset('physical_entity.n.01'), Synset('object.n.01'), Synset('whole.n.02'), Synset('artifact.n.01'), Synset('instrumentality.n.03'), Synset('conveyance.n.03'), Synset('vehicle.n.01'), Synset('wheeled_vehicle.n.01'), Synset('self-propelled_vehicle.n.01'), Synset('motor_vehicle.n.01'), Synset('car.n.01')]\n"
     ]
    }
   ],
   "source": [
    "# 到根结点的路径，可能会有多条，例如“汽车”被归类为“车辆”和“容器”\n",
    "motorcar = wn.synset('car.n.01')\n",
    "paths=motorcar.hypernym_paths()\n",
    "print(\"len(paths)= \",len(paths))\n",
    "show_subtitle(\"通过hypernym_paths抵达motorcar的路径\")\n",
    "for path in paths:\n",
    "    print(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >motorcar 的根节点< ---------------\n[Synset('entity.n.01')]\n--------------- >所有事物的根节点都是'entity.n.01'< ---------------\n[Synset('entity.n.01')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"motorcar 的根节点\")\n",
    "print(motorcar.root_hypernyms())\n",
    "show_subtitle(\"所有事物的根节点都是'entity.n.01'\")\n",
    "print(wn.synset('love.n.01').root_hypernyms())"
   ]
  },
  {
   "source": [
    "### 2.5.3 更多的词汇关系\n",
    "-   同义集关系：上位词 和 下位词 之间的关系被称为词汇关系\n",
    "-   包含和从属的关系：部分 和 整体 之间的关系也被称为词汇关系。例如：树 与 树叶"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >部分-整体关系。树由树桩、树干、树冠、枝干、树节组成< ---------------\nwn.synset('tree.n.01').part_holonyms()=  []\nwn.synset('tree.n.01').part_meronyms()=  [Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]\nwn.synset('burl.n.02').part_holonyms()=  [Synset('tree.n.01')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"部分-整体关系。树由树桩、树干、树冠、枝干、树节组成\")\n",
    "print(\"wn.synset('tree.n.01').part_holonyms()= \", wn.synset('tree.n.01').part_holonyms())\n",
    "print(\"wn.synset('tree.n.01').part_meronyms()= \", wn.synset('tree.n.01').part_meronyms())\n",
    "print(\"wn.synset('burl.n.02').part_holonyms()= \",wn.synset('burl.n.02').part_holonyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >实质关系。树的实质是心材和边材< ---------------\nwn.synset('tree.n.01').substance_holonyms()=  []\nwn.synset('tree.n.01').substance_meronyms()=  [Synset('heartwood.n.01'), Synset('sapwood.n.01')]\nwn.synset('heartwood.n.01').substance_holonyms()=  [Synset('tree.n.01')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"实质关系。树的实质是心材和边材\")\n",
    "print(\"wn.synset('tree.n.01').substance_holonyms()= \", wn.synset('tree.n.01').substance_holonyms())\n",
    "print(\"wn.synset('tree.n.01').substance_meronyms()= \", wn.synset('tree.n.01').substance_meronyms())\n",
    "print(\"wn.synset('heartwood.n.01').substance_holonyms()= \", wn.synset('heartwood.n.01').substance_holonyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >集合关系。森林由树木和丛林组成< ---------------\nwn.synset('tree.n.01').member_holonyms()=  [Synset('forest.n.01')]\nwn.synset('forest.n.01').member_meronyms()=  [Synset('tree.n.01'), Synset('underbrush.n.01')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"集合关系。森林由树木和丛林组成\")\n",
    "print(\"wn.synset('tree.n.01').member_holonyms()= \", wn.synset('tree.n.01').member_holonyms())\n",
    "print(\"wn.synset('forest.n.01').member_meronyms()= \", wn.synset('forest.n.01').member_meronyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "batch.n.02: (often followed by `of') a large number or amount or extent\nmint.n.02: any north temperate plant of the genus Mentha with aromatic leaves and small mauve flowers\nmint.n.03: any member of the mint family of plants\nmint.n.04: the leaves of a mint plant used fresh or candied\nmint.n.05: a candy that is flavored with a mint oil\nmint.n.06: a plant where money is coined by authority of the government\n"
     ]
    }
   ],
   "source": [
    "for synset in wn.synsets('mint', wn.NOUN):\n",
    "    print(synset.name() + ':', synset.definition())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "wn.synset('mint.n.01')=  Synset('batch.n.02')\nwn.synset('mint.n.01').definition()=  (often followed by `of') a large number or amount or extent\nwn.synset('batch.n.02').lemma_names()=  ['batch', 'deal', 'flock', 'good_deal', 'great_deal', 'hatful', 'heap', 'lot', 'mass', 'mess', 'mickle', 'mint', 'mountain', 'muckle', 'passel', 'peck', 'pile', 'plenty', 'pot', 'quite_a_little', 'raft', 'sight', 'slew', 'spate', 'stack', 'tidy_sum', 'wad']\n"
     ]
    }
   ],
   "source": [
    "print(\"wn.synset('mint.n.01')= \", wn.synset('mint.n.01'))\n",
    "print(\"wn.synset('mint.n.01').definition()= \", wn.synset('mint.n.01').definition())\n",
    "print(\"wn.synset('batch.n.02').lemma_names()= \", wn.synset('batch.n.02').lemma_names())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "wn.synset('mint.n.02')=  Synset('mint.n.02')\nwn.synset('mint.n.04').part_holonyms()=  [Synset('mint.n.02')]\nwn.synset('mint.n.04').part_meronyms()=  []\n"
     ]
    }
   ],
   "source": [
    "print(\"wn.synset('mint.n.02')= \", wn.synset('mint.n.02'))\n",
    "print(\"wn.synset('mint.n.04').part_holonyms()= \", wn.synset('mint.n.04').part_holonyms())\n",
    "print(\"wn.synset('mint.n.04').part_meronyms()= \", wn.synset('mint.n.04').part_meronyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "wn.synset('mint.n.05')=  Synset('mint.n.05')\nwn.synset('mint.n.04').substance_holonyms()=  [Synset('mint.n.05')]\nwn.synset('mint.n.04').substance_meronyms()=  []\n"
     ]
    }
   ],
   "source": [
    "print(\"wn.synset('mint.n.05')= \", wn.synset('mint.n.05'))\n",
    "print(\"wn.synset('mint.n.04').substance_holonyms()= \", wn.synset('mint.n.04').substance_holonyms())\n",
    "print(\"wn.synset('mint.n.04').substance_meronyms()= \", wn.synset('mint.n.04').substance_meronyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "wn.synset('mint.n.04').member_holonyms()=  []\nwn.synset('mint.n.04').member_meronyms()=  []\n"
     ]
    }
   ],
   "source": [
    "print(\"wn.synset('mint.n.04').member_holonyms()= \", wn.synset('mint.n.04').member_holonyms())\n",
    "print(\"wn.synset('mint.n.04').member_meronyms()= \", wn.synset('mint.n.04').member_meronyms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >蕴涵: entailments()< ---------------\n",
      "wn.synset('mint.n.04').entailments()=  []\n",
      "wn.synset('walk.v.01').entailments()=  [Synset('step.v.01')]\n",
      "wn.synset('eat.v.01').entailments()=  [Synset('chew.v.01'), Synset('swallow.v.01')]\n",
      "wn.synset('tease.v.03').entailments()=  [Synset('arouse.v.07'), Synset('disappoint.v.01')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"蕴涵: entailments()\")\n",
    "print(\"wn.synset('mint.n.04').entailments()= \", wn.synset('mint.n.04').entailments())\n",
    "\n",
    "# 动词之间的关系\n",
    "print(\"wn.synset('walk.v.01').entailments()= \", wn.synset('walk.v.01').entailments())\n",
    "print(\"wn.synset('eat.v.01').entailments()= \", wn.synset('eat.v.01').entailments())\n",
    "print(\"wn.synset('tease.v.03').entailments()= \", wn.synset('tease.v.03').entailments())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >不能通过同义词集寻找反义词< ---------------\n",
      "--------------- >反义词: antonyms()< ---------------\n",
      "wn.lemma('supply.n.02.supply')=  Lemma('supply.n.02.supply')\n",
      "wn.lemma('supply.n.02.supply').antonyms()=  [Lemma('demand.n.02.demand')]\n",
      "wn.lemma('rush.v.01.rush').antonyms()=  [Lemma('linger.v.04.linger')]\n",
      "wn.lemma('horizontal.a.01.horizontal')=  Lemma('horizontal.a.01.horizontal')\n",
      "wn.lemma('horizontal.a.01.horizontal').antonyms()=  [Lemma('vertical.a.01.vertical'), Lemma('inclined.a.02.inclined')]\n",
      "wn.lemma('staccato.r.01.staccato').antonyms()=  [Lemma('legato.r.01.legato')]\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"不能通过同义词集寻找反义词\")\n",
    "# print(\"wn.synset('supply.n.02.supply').antonyms()= \", wn.synset('supply.n.02.supply').antonyms())\n",
    "show_subtitle(\"反义词: antonyms()\")\n",
    "print(\"wn.lemma('supply.n.02.supply')= \", wn.lemma('supply.n.02.supply'))\n",
    "print(\"wn.lemma('supply.n.02.supply').antonyms()= \", wn.lemma('supply.n.02.supply').antonyms())\n",
    "print(\"wn.lemma('rush.v.01.rush').antonyms()= \", wn.lemma('rush.v.01.rush').antonyms())\n",
    "print(\"wn.lemma('horizontal.a.01.horizontal')= \", wn.lemma('horizontal.a.01.horizontal'))\n",
    "print(\"wn.lemma('horizontal.a.01.horizontal').antonyms()= \", wn.lemma('horizontal.a.01.horizontal').antonyms())\n",
    "print(\"wn.lemma('staccato.r.01.staccato').antonyms()= \", wn.lemma('staccato.r.01.staccato').antonyms())"
   ]
  },
  {
   "source": [
    "### 2.5.4 语义相似度\n",
    "同义词集：是由复杂的词汇关系网络所连接起来的。给定一个同义词集，可以遍历 WrodNet 网络来查找相关含义的同义词集。\n",
    "\n",
    "每个同义词集都有一个 或者 多个 上位词路径连接到一个 根上位词。\n",
    "\n",
    "连接到同一个根的两个同义词集可能有一些共同的上位词。\n",
    "\n",
    "如果两个同义词集共用一个特定的上位词——在上位词层次结构中处于较低层——它们一定有密切的联系。\n",
    "\n",
    "用于测度拥有共同的上位词的同义词集之间的距离。"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >right 与 minke 的相同上位词< ---------------\n",
      "[Synset('baleen_whale.n.01')]\n",
      "wn.synset('baleen_whale.n.01').min_depth()=  14\n",
      "--------------- >right 与 orca 的相同上位词< ---------------\n",
      "[Synset('whale.n.02')]\n",
      "wn.synset('whale.n.02').min_depth()=  13\n",
      "--------------- >right 与 tortoise 的相同上位词< ---------------\n",
      "[Synset('vertebrate.n.01')]\n",
      "wn.synset('vertebrate.n.01').min_depth()=  8\n",
      "--------------- >right 与 novel 的相同上位词< ---------------\n",
      "[Synset('entity.n.01')]\n",
      "wn.synset('entity.n.01').min_depth()=  0\n"
     ]
    }
   ],
   "source": [
    "right = wn.synset('right_whale.n.01')\n",
    "\n",
    "minke = wn.synset('minke_whale.n.01')\n",
    "show_subtitle(\"right 与 minke 的相同上位词\")\n",
    "print(right.lowest_common_hypernyms(minke))\n",
    "print(\"wn.synset('baleen_whale.n.01').min_depth()= \", wn.synset('baleen_whale.n.01').min_depth())\n",
    "\n",
    "show_subtitle(\"right 与 orca 的相同上位词\")\n",
    "orca = wn.synset('orca.n.01')\n",
    "print(right.lowest_common_hypernyms(orca))\n",
    "print(\"wn.synset('whale.n.02').min_depth()= \", wn.synset('whale.n.02').min_depth())\n",
    "\n",
    "show_subtitle(\"right 与 tortoise 的相同上位词\")\n",
    "tortoise = wn.synset('tortoise.n.01')\n",
    "print(right.lowest_common_hypernyms(tortoise))\n",
    "print(\"wn.synset('vertebrate.n.01').min_depth()= \", wn.synset('vertebrate.n.01').min_depth())\n",
    "\n",
    "show_subtitle(\"right 与 novel 的相同上位词\")\n",
    "novel = wn.synset('novel.n.01')\n",
    "print(right.lowest_common_hypernyms(novel))\n",
    "print(\"wn.synset('entity.n.01').min_depth()= \", wn.synset('entity.n.01').min_depth())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "--------------- >路径相似度度量< ---------------\n",
      "right.path_similarity(minke)=  0.25\n",
      "right.path_similarity(orca)=  0.16666666666666666\n",
      "right.path_similarity(tortoise)=  0.07692307692307693\n",
      "right.path_similarity(novel)=  0.043478260869565216\n"
     ]
    }
   ],
   "source": [
    "show_subtitle(\"路径相似度度量\")\n",
    "print(\"right.path_similarity(minke)= \",right.path_similarity(minke))\n",
    "print(\"right.path_similarity(orca)= \",right.path_similarity(orca))\n",
    "print(\"right.path_similarity(tortoise)= \",right.path_similarity(tortoise))\n",
    "print(\"right.path_similarity(novel)= \",right.path_similarity(novel))"
   ]
  },
  {
   "source": [
    "## 2.6 小结\n",
    "\n",
    "-   文本语料库是一个大型结构化文本的集合\n",
    "-   条件频率分布是频率分布的集合，每个分布都有不同的条件\n",
    "-   WordNet是一个面向语义的英文词典，由同义词的集合——或称为同义词集(synsets)——组成，并且组成一个网络"
   ],
   "cell_type": "markdown",
   "metadata": {}
  }
 ]
}